**** Define macros ****

global dir "YOUR PATH HERE"

local occdir "${dir}/xwalk_occ"
local inddir "${dir}/xwalk_ind"

import delimited "`dir'/nlsy79_learning.csv", case(preserve) clear 
do "`dir'/nlsy79_value-labels.do"
** Code variables as missing if values are less than zero
foreach var of varlist _all {
	replace `var'=. if `var'<0
}
*drop military sample
drop if esr_key_1979==8

** Individual ID codes
rename caseid_1979 caseid

** Demographics
gen race=sample_race_78scrn
gen sex=sample_sex_1979
gen age1979=fam_1b_1979

* Age by year
forvalues y=1980(1)1994 {
	local z=`y'-1979
	gen age`y'=age1979+`z'
}
forvalues y=1996(2)2020 {
	local z=`y'-1979
	gen age`y'=age1979+`z'
}

** Urbanicity and region
forvalues y=1979(1)1994 {
	gen urban`y'=urban_rural_`y'
	gen div`y'=region_`y'
	gen metro`y'=smsares_`y'
}
forvalues y=1996(2)2018 {
	gen urban`y'=urban_rural_`y'
	gen div`y'=region_`y'
	gen metro`y'=smsares_`y'
}

** Highest grade completed
egen educ=rowmax(hgcrev*)
gen educ_alt=hgc_ever
** Employment since date of last interview (DLI)
forvalues y=1979(1)1994 {
	rename lastint_jobs_`y'_xrnd emp`y'
	recode emp`y' (0=0) (1/20=1)
}
forvalues y=1996(2)2018 {
	rename lastint_jobs_`y'_xrnd emp`y'
	recode emp`y' (0=0) (1/20=1)
}
/*
** Hourly rate of pay
forvalues y=1979(1)1994 {
	gen wage`y'=cpshrp_`y'/100
}
forvalues y=1996(2)2018 {
	gen wage`y'=hrp1_`y'/100
}
*/
** Hourly rate of pay
*trim wages as well
forvalues y=1979(1)1994 {
	gen wage`y'=hrp1_`y'/100
	replace wage`y'=. if wage`y'<1 
	*replace wage`y'=200 if wage`y'>200 & wage`y'!=. 	
	forvalues z=2(1)5 {
		gen wage`z'_`y'=hrp`z'_`y'/100
		replace wage`z'_`y'=. if wage`z'_`y'<1 
		*replace wage`z'_`y'=200 if wage`z'_`y'>200 & wage`z'_`y'!=. 		
	}
}
forvalues y=1996(2)2020 {
	gen wage`y'=hrp1_`y'/100
	replace wage`y'=. if wage`y'<1 
	*replace wage`y'=200 if wage`y'>200 & wage`y'!=.	
	forvalues z=2(1)5 {
		gen wage`z'_`y'=hrp`z'_`y'/100
		replace wage`z'_`y'=. if wage`z'_`y'<1
		*replace wage`z'_`y'=200 if wage`z'_`y'>200 & wage`z'_`y'!=. 				
	}
}
*Wage income
forvalues y=1979(1)1981 {
	rename q13_5_`y' incwage`y'
}
forvalues y=1982(1)1994 {
	rename q13_5_trunc_revised_`y' incwage`y'
}
forvalues y=1996(2)2000 {
	rename q13_5_trunc_revised_`y' incwage`y'
}
forvalues y=2002(2)2018 {
	rename q13_5_trunc_`y' incwage`y'
}
*school enrollment
forvalues y=79(1)94 {
	gen schl19`y'=enrollmtrev`y'_19`y'==2 | enrollmtrev`y'_19`y'==3
}
forvalues y=96(2)98 {
	gen schl19`y'=enrollmtrev`y'_19`y'==2 | enrollmtrev`y'_19`y'==3
}
forvalues y=0(2)6 {
	gen schl200`y'=enrollmtrev0`y'_200`y'==2 | enrollmtrev0`y'_200`y'==3
}
gen schl_max=1998 if schl1998==1
forvalues y=1998(-2)1994 {
	replace schl_max=`y' if schl`y'==1 & schl_max==.
}
forvalues y=1994(-1)1979 {
	replace schl_max=`y' if schl`y'==1 & schl_max==.
}

*alternate schl_max - full-time only
rename q3_24_01_1986 q3_24_1_1986
rename q3_24_01_1993 q3_24_1_1993
forvalues y=1994(2)2002 {
    rename q3_24_01_`y' q3_24_1_`y'
}
gen q3_24_1_1987=.
gen q3_24_1_1991=.

gen schl_max_full=1998 if schl1998==1
replace schl_max_full=. if q3_24_1_1998==2
forvalues y=1998(-2)1994 {
	replace schl_max_full=`y' if schl`y'==1 & schl_max_full==. & q3_24_1_`y'!=2
}
forvalues y=1994(-1)1979 {
	replace schl_max_full=`y' if schl`y'==1 & schl_max_full==. & q3_24_1_`y'!=2
}
replace schl_max_full=schl_max if schl_max_full==.

* Hourly wage excluding those enrolled in school
forvalues y=79(1)94 {
		gen wage_noschl19`y'= wage19`y' if enrollmtrev`y'_19`y'!=2 & enrollmtrev`y'_19`y'!=3
}
forvalues y=96(2)98 {
	gen wage_noschl19`y'=wage19`y' if enrollmtrev`y'_19`y'!=2 & enrollmtrev`y'_19`y'!=3
}
forvalues y=0(2)6 {
	gen wage_noschl200`y'=wage200`y' if enrollmtrev0`y'_200`y'!=2 & enrollmtrev0`y'_200`y'!=3
}

** Occupation: Apply occ1990dd crosswalk

*this applies most current census occ coding to current occ held
*occs in jobs 2-5 use census 1970 codes from 1979-2000, then 2000 codes from 2002-2020

* Remove extra digit for occ codes beginning in 2004
forvalues y=2004(2)2020 {
	forvalues z=1(1)5 {
		rename occall_emp_0`z'_`y' occall_4d
		gen occall_emp_0`z'_`y'=substr(string(occall_4d), 1, length(string(occall_4d))-1)
		destring occall_emp_0`z'_`y', replace
		drop occall_4d
	}
}

* Years 1979-1981: occ70
forvalues y=1979(1)1981 {
	rename cpsocc70_`y' occ
	merge m:1 occ using "`occdir'/occ1970_occ1990dd.dta", keep(master match) nogen
	display "Occupations not matched to occ1990dd: `y'"
	levelsof occ if occ1990dd==.
	rename occ occ`y'
	rename occ1990dd occ1990dd`y'
}

* Years 1982-2000: occ80
forvalues y=1982(1)1994 {
	rename cpsocc80_`y' occ
	merge m:1 occ using "`occdir'/occ1980_occ1990dd.dta", ///
		keep(master match) nogen
	display "Occupations not matched to occ1990dd: `y'"
	levelsof occ if occ1990dd==.
	rename occ occ`y'
	rename occ1990dd occ1990dd`y'
}
rename cpsocc80_01_2000 cpsocc80_2000
forvalues y=1996(2)2000 {
	rename cpsocc80_`y' occ
	merge m:1 occ using "`occdir'/occ1980_occ1990dd.dta", ///
		keep(master match) nogen
	display "Occupations not matched to occ1990dd: `y'"
	levelsof occ if occ1990dd==.
	rename occ occ`y'
	rename occ1990dd occ1990dd`y'	
}
*Years 1979-2002 for occs 2 through 5
forvalues y=1979(1)1994 {
	forvalues z=2(1)5 {
		rename occall_emp_0`z'_`y' occ
		merge m:1 occ using "`occdir'/occ1970_occ1990dd.dta", ///
			keep(master match) nogen
		display "Occupations not matched to occ1990dd: `y'"
		levelsof occ if occ1990dd==.
		rename occ occ`z'_`y'
		rename occ1990dd occ1990dd`z'_`y'	
	}
}
forvalues y=1996(2)2000 {
	forvalues z=2(1)5 {
		rename occall_emp_0`z'_`y' occ
		merge m:1 occ using "`occdir'/occ1970_occ1990dd.dta", ///
			keep(master match) nogen
		display "Occupations not matched to occ1990dd: `y'"
		levelsof occ if occ1990dd==.
		rename occ occ`z'_`y'
		rename occ1990dd occ1990dd`z'_`y'	
	}
}	

* Years 2002-2020: occ2000
forvalues y=2002(2)2020 {
	rename occall_emp_01_`y' occ
	merge m:1 occ using "`occdir'/occ2000_occ1990dd.dta", ///
		keep(master match) nogen
	display "Occupations not matched to occ1990dd: `y'"
	levelsof occ if occ1990dd==.
	rename occ occ`y'
	rename occ1990dd occ1990dd`y'	
}
* Years 2002-2020: occ2000 - occs 2 through 5
forvalues y=2002(2)2020 {
	forvalues z=2(1)5 {
		rename occall_emp_0`z'_`y' occ
		merge m:1 occ using "`occdir'/occ2000_occ1990dd.dta", ///
			keep(master match) nogen
		display "Occupations not matched to occ1990dd: `y'"
		levelsof occ if occ1990dd==.
		rename occ occ`z'_`y'
		rename occ1990dd occ1990dd`z'_`y'	
	}
}
** Industry: Apply ind6090 crosswalk

* Years 1979-1981: ind70
forvalues y=1979(1)1981 {
	rename cpsind70_`y' ind70
	merge m:1 ind70 using "`inddir'/ind70.dta", keep(master match) nogen ///
		keepusing(ind70 ind6090)
	display "Industries not matched to ind6090: `y'"
	levelsof ind70 if ind6090==.
	rename ind70 ind`y'
	rename ind6090 ind6090`y'
}

* Years 1982-2000: ind80
forvalues y=1982(1)1994 {
	rename cpsind80_`y' ind80
	merge m:1 ind80 using "`inddir'/ind80.dta", keep(master match) nogen ///
		keepusing(ind80 ind6090)
	display "Industries not matched to ind6090: `y'"
	levelsof ind80 if ind6090==.
	rename ind80 ind`y'
	rename ind6090 ind6090`y'
}
rename cpsind80_01_2000 cpsind80_2000
forvalues y=1996(2)2000 {
	rename cpsind80_`y' ind80
	merge m:1 ind80 using "`inddir'/ind80.dta", keep(master match) nogen ///
		keepusing(ind80 ind6090)
	display "Industries not matched to ind6090: `y'"
	levelsof ind80 if ind6090==.
	rename ind80 ind`y'
	rename ind6090 ind6090`y'
}

* Years 2002-2016: ind00
forvalues y=2002(2)2018 {
	rename indall_emp_01_`y' ind00
	if `y'==2004 | `y'==2006 | `y'==2008 | `y'==2010 | `y'==2012 {
		replace ind00=floor(ind00/10)
	}
	merge m:1 ind00 using "`inddir'/ind00.dta", keep(master match) nogen
	display "Industries not matched to ind6090: `y'"
	levelsof ind00 if ind6090==.
	rename ind00 ind`y'
	rename ind6090 ind6090`y'
}

*Create variable for first occupation after school enrollment
gen first_occ=.
gen first_occ_full=.
gen occ1990dd1995=.
forvalues y=1979(1)1993 {
	local z=`y'+1
	local a=`y'+2
	replace first_occ=occ1990dd`z' if schl_max==`y'
	replace first_occ=occ1990dd`a' if schl_max==`y' & first_occ==.
	replace first_occ_full=occ1990dd`z' if schl_max_full==`y'
	replace first_occ_full=occ1990dd`a' if schl_max_full==`y' & first_occ==.	
}
forvalues y=1994(2)2006 {
	local z=`y'+2
	local a=`y'+4
	replace first_occ=occ1990dd`z' if schl_max==`y'
	replace first_occ=occ1990dd`a' if schl_max==`y' & first_occ==.	
	replace first_occ_full=occ1990dd`z' if schl_max_full==`y'
	replace first_occ_full=occ1990dd`a' if schl_max_full==`y' & first_occ==.		
}
drop occ1990dd1995

** Social skills composite

* School activity participation
gen youth_org=school_46_000001_1984
gen hobby=school_46_000002_1984
gen stugov=school_46_000003_1984
gen newsp=school_46_000004_1984
gen athletics=school_46_000005_1984
gen perfarts=school_46_000006_1984

* Sociability
gen social_age6=health_soc_1_1985
gen social_adult=health_soc_2_1985

* Number of clubs, including zero
foreach x in youth_org hobby stugov newsp athletics perfarts {
	gen `x'_cat=(`x'!=.)
}
egen num_clubs=rowtotal(youth_org_cat hobby_cat stugov_cat newsp_cat athletics_cat perfarts_cat)
replace num_clubs=. if athletics_cat==.

* Standardize
foreach var of varlist social_age6 social_adult num_clubs athletics_cat {
	egen `var'_std=std(`var'), mean(0) std(1)
}

* Composite 1: 4 elements (use in NLSY79-only analyses)
egen soc_nlsy=rowmean(social_age6_std social_adult_std athletics_cat_std num_clubs_std)
egen soc_nlsy_std=std(soc_nlsy), mean(0) std(1)

* Composite 2: 2 elements (use in analyses with NLSY79 & NLSY97)
egen soc_nlsy2=rowmean(social_age6_std social_adult_std)
egen soc_nlsy2_std=std(soc_nlsy2), mean(0) std(1)

** Non-Cognitive measures: Rotter & Rosenberg score
egen rotter_std=std(rotter_score_1979), mean(0) std(1)
replace rotter_std=-rotter_std
*risk aversion
gen risk_averse=risk_4_2010

*Job tenure*
forvalues y=1979(1)1994 {
	rename tenure1_`y' tenure_`y'
}
forvalues y=1996(2)2020 {
	rename tenure1_`y' tenure_`y'
}
foreach x in jobsnum tenure {
	forvalues y=1979(1)1994 {
		rename `x'_`y' `x'`y'
	}
}
foreach x in jobsnum tenure {
	forvalues y=1996(2)2020 {
		rename `x'_`y' `x'`y'
	}
}

*Firm size
forvalues y=1979(1)1980 {
	rename cps_qes_firmsz1_`y' multemp`y'
	rename cps_qes_firmsz2_`y' numemp`y'
}
forvalues y=1986(1)1993 {
	rename cps_qes_firmsz1_`y' multemp`y'
	rename cps_qes_firmsz2_`y' numemp`y'
}
forvalues y=1994(2)2018 {
	rename qes_firmsz1_01_`y' multemp`y'
	rename qes_firmsz2_01_`y' numemp`y'
}

*training variables
rename q8_18_0_1980 q8_18_1980
rename q8_18_0_1981 q8_18_1981
forvalues y=1979(1)1986 {
	rename q8_18_`y' train`y'
}
forvalues y=1988(1)1994 {
	rename q8_18_`y' train`y'
}
forvalues y=1996(2)2018 {
	rename q8_18_`y' train`y'
}
forvalues y=1979(1)1986 {
	rename q8_20_01_`y' train_type`y'
}
forvalues y=1988(1)1994 {
	rename q8_20_01_`y' train_type`y'
}
forvalues y=1996(2)2000 {
	rename q8_20_01_`y' train_type`y'
}
forvalues y=2002(2)2018 {
	rename q8_20a_01_`y' train_type`y'
}

*Construct measure of months since last interview
*Assume exactly 1 year for 1979
gen lintdate_y_1980=1979
*fix labeling issue for 1981 to 1993
forvalues y=1981(1)1993 {
    replace lintdate_y_`y'=lintdate_y_`y'+1900
}
forvalues y=1980(1)1994 {
    gen months_sli`y'=.
    gen temp=`y'-lintdate_y_`y'
	gen temp2=curdate_m_`y'-lintdate_m_`y'
    replace months_sli`y'=(12*temp)+temp2
	drop temp temp2
}
forvalues y=1996(2)2012 {
    gen months_sli`y'=.
    gen temp=`y'-lintdate_y_`y'
	gen temp2=curdate_m_`y'-lintdate_m_`y'
    replace months_sli`y'=(12*temp)+temp2
	drop temp temp2
}
*no month available after 2014 - assume same month
forvalues y=2014(2)2018 {
    gen months_sli`y'=.
    gen temp=`y'-symbol_lintyear_`y'
	replace months_sli`y'=12*temp
	drop temp
}

*Construct work experience variable using reported hours and weeks worked
*one measure uses just last year and assumes representativeness, the other uses "since last interview"*
*computed as fractional, so 1 equals 52*40=2080 hours for one-year interval, 4160 for 2 year interval

forvalues y=1979(1)1994 {
	gen wkexp_yr`y'=hrswk_pcy_`y'/2080
	replace wkexp_yr`y'=0 if wkexp_yr`y'==.
	gen maxexp_yr`y'=wkexp_yr`y'
	replace maxexp_yr`y'=1 if maxexp_yr`y'>1 & maxexp_yr`y'!=.
}
forvalues y=1996(2)2020 {
	gen wkexp_yr`y'=hrswk_pcy_`y'/2080
	replace wkexp_yr`y'=0 if wkexp_yr`y'==.	
	gen maxexp_yr`y'=wkexp_yr`y'
	replace maxexp_yr`y'=1 if maxexp_yr`y'>1 & maxexp_yr`y'!=.	
}
*Now compute work experience since last interview, using months measure above
*measure is the same as pcy for 1979
gen wkexp_sli1979=wkexp_yr1979
gen maxexp_sli1979=maxexp_yr1979
forvalues y=1980(1)1994 {
    gen yrs_sli`y'=months_sli`y'/12
	gen wkexp_sli`y'=(hrswk_sli_`y'/yrs_sli`y')/2080
	replace wkexp_sli`y'=0 if wkexp_sli`y'==.
	gen maxexp_sli`y'=wkexp_sli`y'	
	replace maxexp_sli`y'=1 if maxexp_sli`y'>1 & maxexp_sli`y'!=.
}
forvalues y=1996(2)2018 {
    gen yrs_sli`y'=months_sli`y'/12
	gen wkexp_sli`y'=(hrswk_sli_`y'/yrs_sli`y')/2080
	replace wkexp_sli`y'=0 if wkexp_sli`y'==.	
	gen maxexp_sli`y'=wkexp_sli`y'	
	replace maxexp_sli`y'=1 if maxexp_sli`y'>1 & maxexp_sli`y'!=.
}
*create indicator for first year working at least half/nearly full time (eg 1040 hours and 1500 hours)
gen wk_start_half=.
gen wk_start_full=.
forvalues y=1979(1)1994 {
	replace wk_start_half=`y' if wkexp_yr`y'>=0.5 & wk_start_half==.
	replace wk_start_full=`y' if wkexp_yr`y'>=0.75 & wk_start_full==.
	
}
forvalues y=1996(2)2018 {
	replace wk_start_half=`y' if wkexp_yr`y'>=0.5 & wk_start_half==.
	replace wk_start_full=`y' if wkexp_yr`y'>=0.75 & wk_start_full==.
	
}


*Create measures of tenure by job*
*missing for people still employed at job, so fill in current date
forvalues y=79(1)93 {
	forvalues z=1(1)5 {
		replace employer_startdate_0`z'_y_19`y'=employer_startdate_0`z'_y_19`y'-1900 if employer_startdate_0`z'_y_19`y'>1900 & employer_startdate_0`z'_y_19`y'!=.
		replace employer_stopdate_0`z'_y_19`y'=employer_stopdate_0`z'_y_19`y'-1900 if employer_stopdate_0`z'_y_19`y'>1900 & employer_stopdate_0`z'_y_19`y'!=.
		replace employer_stopdate_0`z'_y_19`y'=`y' if employer_stopdate_0`z'_y_19`y'==. & employer_startdate_0`z'_y_19`y'!=.
		replace employer_stopdate_0`z'_m_19`y'=curdate_m_19`y' if employer_stopdate_0`z'_m_19`y'==. & employer_startdate_0`z'_m_19`y'!=.
		gen temp1=(19`y'-(`y'- employer_stopdate_0`z'_y_19`y'))-(19`y'-(`y'- employer_startdate_0`z'_y_19`y'))
		gen temp2= employer_stopdate_0`z'_m_19`y'- employer_startdate_0`z'_m_19`y'
		gen job_tenure`z'_19`y'=(12*temp1)+temp2
		replace job_tenure`z'_19`y'=0 if job_tenure`z'_19`y'<0
		drop temp1 temp2
	}
}
*fix weird 1996 startdate issue
forvalues z=1(1)5 {
	replace employer_startdate_0`z'_y_1996=1994 if employer_startdate_0`z'_y_1996==. & employer_stopdate_0`z'_y_1996!=.
	replace employer_startdate_0`z'_m_1996=curdate_m_1996 if employer_startdate_0`z'_m_1996==. & employer_stopdate_0`z'_m_1996!=.
}
forvalues y=1994(2)2018 {
	forvalues z=1(1)5 {
		replace employer_stopdate_0`z'_y_`y'=`y' if employer_stopdate_0`z'_y_`y'==. & employer_startdate_0`z'_y_`y'!=.
		replace employer_stopdate_0`z'_m_`y'=curdate_m_`y' if employer_stopdate_0`z'_m_`y'==. & employer_startdate_0`z'_m_`y'!=.
		gen temp1=(employer_stopdate_0`z'_y_`y'- employer_startdate_0`z'_y_`y')
		gen temp2= employer_stopdate_0`z'_m_`y'- employer_startdate_0`z'_m_`y'
		gen job_tenure`z'_`y'=(12*temp1)+temp2
		replace job_tenure`z'_`y'=0 if job_tenure`z'_`y'<0
		drop temp1 temp2
	}
}

*Currently working in job
forvalues y=1979(1)1994 {
	forvalues z=1(1)5 {
		gen cur_work`z'_`y'=1 if qes_23_0`z'_`y'==1
		replace cur_work`z'_`y'=0 if qes_23_0`z'_`y'==0
	}
}
forvalues y=1996(2)2020 {
	forvalues z=1(1)5 {
		gen cur_work`z'_`y'=1 if qes_23_0`z'_`y'==1
		replace cur_work`z'_`y'=0 if qes_23_0`z'_`y'==0
	}
}
*Job changes and wage changes*
*Collapse reasons into 1) layoffs; 2) firings; 3) voluntary quits of any kind
*layoff/plant closing code
forvalues y=1979(1)1983 {
	forvalues z=1(1)5 {
		gen layoff_`z'_`y'=qes_23a_0`z'_`y'==1 | qes_23a_0`z'_`y'==3
		replace layoff_`z'_`y'=0 if qes_23a_0`z'_`y'!=. & layoff_`z'_`y'!=1
	}
}
forvalues y=1984(1)1994 {
	forvalues z=1(1)5 {
		gen layoff_`z'_`y'=1 if (qes_23a_0`z'_`y'==1 | qes_23a_0`z'_`y'==2 | qes_23a_0`z'_`y'==3 | qes_23a_0`z'_`y'==5)
		replace layoff_`z'_`y'=0 if qes_23a_0`z'_`y'!=. & layoff_`z'_`y'!=1
	}
}
forvalues y=1996(2)2018 {
	forvalues z=1(1)5 {
		gen layoff_`z'_`y'=1 if (qes_23a_0`z'_`y'==1 | qes_23a_0`z'_`y'==2 | qes_23a_0`z'_`y'==3)
		replace layoff_`z'_`y'=0 if qes_23a_0`z'_`y'!=. & layoff_`z'_`y'!=1
	}
}
forvalues y=1979(1)1983 {
	forvalues z=1(1)5 {
		gen quit_`z'_`y'=qes_23a_0`z'_`y'>=4 & qes_23a_0`z'_`y'!=.
		replace quit_`z'_`y'=0 if qes_23a_0`z'_`y'!=. & quit_`z'_`y'!=1		
	}
}
forvalues y=1984(1)1994 {
	forvalues z=1(1)5 {
		gen quit_`z'_`y'=qes_23a_0`z'_`y'>=6 & qes_23a_0`z'_`y'!=.
		replace quit_`z'_`y'=0 if qes_23a_0`z'_`y'!=. & quit_`z'_`y'!=1			
	}
}
forvalues y=1996(2)2018 {
	forvalues z=1(1)5 {
		gen quit_`z'_`y'=qes_23a_0`z'_`y'>=6 & qes_23a_0`z'_`y'!=.
		replace quit_`z'_`y'=0 if qes_23a_0`z'_`y'!=. & quit_`z'_`y'!=1				
	}
}
forvalues y=1979(1)1983 {
	forvalues z=1(1)5 {
		gen fire_`z'_`y'=qes_23a_0`z'_`y'==2
		replace fire_`z'_`y'=0 if qes_23a_0`z'_`y'!=. & fire_`z'_`y'!=1
	}
}
forvalues y=1984(1)1994 {
	forvalues z=1(1)5 {
		gen fire_`z'_`y'=1 if (qes_23a_0`z'_`y'==4)
		replace fire_`z'_`y'=0 if qes_23a_0`z'_`y'!=. & fire_`z'_`y'!=1
	}
}
forvalues y=1996(2)2018 {
	forvalues z=1(1)5 {
		gen fire_`z'_`y'=1 if (qes_23a_0`z'_`y'==4)
		replace fire_`z'_`y'=0 if qes_23a_0`z'_`y'!=. & fire_`z'_`y'!=1
	}
}
*any layoff or quit in a given year*
forvalues y=1979(1)1994 {
	egen layoff`y'=rowmax(layoff_1_`y'-layoff_5_`y')
}
forvalues y=1996(2)2018 {
	egen layoff`y'=rowmax(layoff_1_`y'-layoff_5_`y')
}
forvalues y=1979(1)1994 {
	egen quit`y'=rowmax(quit_1_`y'-quit_5_`y')
}
forvalues y=1996(2)2018 {
	egen quit`y'=rowmax(quit_1_`y'-quit_5_`y')
}
forvalues y=1979(1)1994 {
	egen fire`y'=rowmax(fire_1_`y'-fire_5_`y')
}
forvalues y=1996(2)2018 {
	egen fire`y'=rowmax(fire_1_`y'-fire_5_`y')
}

*Job 1 is the most recent job, so wage changes relative to job X will be X minus X+1*
*begin with first jobs - next wage observation not available until following year
foreach j in layoff quit fire {
	gen `j'_change1979=.
	forvalues y=1980(1)1994 {
		local x=`y'-1
		gen `j'_change`y'=wage`y'-wage`x' if `j'_1_`x'==1
	}
	forvalues y=1979(1)1994 {
		gen wage1_`y'=wage`y'
		forvalues z=1(1)4 {
			local a=`z'+1
			replace `j'_change`y'=wage`z'_`y'-wage`a'_`y' if `j'_change`y'==. & `j'_`a'_`y'==1		
		}
		drop wage1_`y'
	}
	gen `j'_change1996=.
	forvalues y=1998(2)2018 {
		local x=`y'-2
		gen `j'_change`y'=wage`y'-wage`x' if `j'_1_`x'==1
	}
	forvalues y=1996(2)2018 {
		gen wage1_`y'=wage`y'
		forvalues z=1(1)4 {
			local a=`z'+1
			replace `j'_change`y'=wage`z'_`y'-wage`a'_`y' if `j'_change`y'==. & `j'_`a'_`y'==1	
		}
		drop wage1_`y'
	}
}
drop layoff_1_1979- fire_5_2018
*create lags and leads for event study
foreach x in layoff quit fire {
	forvalues y=1980(1)1993 {
		local z=`y'+1
		local a=`y'-1
		gen `x'_2change`y'=wage`z'-wage`a' if `x'_change`y'!=.
	} 
	forvalues y=1996(2)2016 {
		local z=`y'+2
		local a=`y'-2
		gen `x'_2change`y'=wage`z'-wage`a' if `x'_change`y'!=.
	} 
}
foreach x in layoff quit {
	forvalues y=1980(1)1992 {
		local z=`y'+2
		local a=`y'-1
		gen `x'_3change`y'=wage`z'-wage`a' if `x'_change`y'!=.
	} 
	forvalues y=1996(2)2014 {
		local z=`y'+4
		local a=`y'-2
		gen `x'_3change`y'=wage`z'-wage`a' if `x'_change`y'!=.
	} 
}
foreach x in layoff quit fire {
	forvalues y=1980(1)1991 {
		local z=`y'+3
		local a=`y'-1
		gen `x'_4change`y'=wage`z'-wage`a' if `x'_change`y'!=.
	} 
	forvalues y=1996(2)2012 {
		local z=`y'+6
		local a=`y'-2
		gen `x'_4change`y'=wage`z'-wage`a' if `x'_change`y'!=.
	} 
}
*avg weekly hours worked by job
forvalues z=1(1)5 {
	rename qes_52a_0`z'_1979 hrs`z'_1979
}
*1980-1993, the cps variable keeps those who stayed in the same job across waves
forvalues y=1980(1)1993 {
	rename cps_qes_52a_`y' hrs1_`y'
	replace hrs1_`y'=qes_52a_01_`y' if hrs1_`y'==.
}

forvalues y=1980(1)1993 {
	forvalues z=2(1)5 {
		rename qes_52a_0`z'_`y' hrs`z'_`y'
	}
}
forvalues y=1994(2)1996 {
	forvalues z=1(1)5 {
		rename qes_52a_0`z'_`y' hrs`z'_`y'
	}
}
*1998-2018
forvalues y=1998(2)2018 {
   	forvalues z=1(1)5 {
		rename hours_worked_week_all_0`z'_`y' hrs`z'_`y'
	}
}

*clean unique job numbers to back out tenure and new job starting
forvalues i=1(1)9 {
	tostring uid_0`i'_xrnd, g(temp)
	gen job`i'_yr=substr(temp,1,4)
	destring job`i'_yr, force replace
	drop temp
}
forvalues i=10(1)65 {
	tostring uid_`i'_xrnd, g(temp)
	gen job`i'_yr=substr(temp,1,4)
	destring job`i'_yr, force replace
	drop temp
}
forvalues y=1980(1)1993 {
	gen new_job`y'=.
	forvalues i=1(1)65 {
	replace new_job`y'=1 if job`i'_yr==`y'
	replace new_job`y'=0 if emp`y'==1 & new_job`y'!=1
	}
}
forvalues y=1994(2)2018 {
	gen new_job`y'=.
	forvalues i=1(1)65 {
		replace new_job`y'=1 if job`i'_yr==`y'
		replace new_job`y'=0 if emp`y'==1 & new_job`y'!=1	
	}
}

** Restrict sample to selected variables and compress
keep caseid race sex age* urban1979-urban2016 div* metro* educ wage* incwage* occ* new_job* ///
	emp* multemp* numemp* ind* jobsnum* job_tenure* tenure* train* wkexp_yr* wkexp_sli* maxexp_yr* maxexp_sli* cur_work* wk_start_full wk_start_half ///
	hrs1* hrs2* hrs3* hrs4* hrs5* layoff* quit* fire* schl_max schl_max_full first_occ first_occ_full soc_nlsy_std soc_nlsy2_std rotter_std
compress

*calculate cumulative work experience
foreach z in wk max {
	foreach x in yr sli {
		gen `z'exp_`x'_total1979=`z'exp_`x'1979
		replace `z'exp_`x'_total1979=0 if `z'exp_`x'_total1979==.
		forvalues y=1980(1)1994 {
			local a=`y'-1
			gen `z'exp_`x'_total`y'=`z'exp_`x'`y'+`z'exp_`x'_total`a'
		}
		forvalues y=1996(2)2018 {
			local b=`y'-2
			gen `z'exp_`x'_total`y'=(2*`z'exp_`x'`y')+`z'exp_`x'_total`b'
		}	
	}
}
gen wkexp_yr_total2020=(2*wkexp_yr2020)+wkexp_yr_total2018
gen maxexp_yr_total2020=(2*maxexp_yr2020)+maxexp_yr_total2018


*calculate work experience up to age 18
foreach z in wk max {
	foreach x in yr sli {
		gen `z'exp_`x'_youth1979=`z'exp_`x'1979 
		replace `z'exp_`x'_youth1979=0 if age1979>18
		replace `z'exp_`x'_youth1979=0 if `z'exp_`x'_youth1979==.
		forvalues y=1980(1)1994 {
			local a=`y'-1
			gen `z'exp_`x'_youth`y'=`z'exp_`x'`y'+`z'exp_`x'_youth`a'
			replace `z'exp_`x'_youth`y'=0 if age`y'>18
		}
		forvalues y=1996(2)2018 {
			local b=`y'-2
			gen `z'exp_`x'_youth`y'=(2*`z'exp_`x'`y')+`z'exp_`x'_youth`b'
			replace `z'exp_`x'_youth`y'=0 if age`y'>18			
		}	
	}
} 
** Reshape data long for main analysis
rename caseid id
reshape long age urban div metro wage wage2_ wage3_ wage4_ wage5_ wage_noschl incwage occ occ2_ occ3_ occ4_ occ5_ occ1990dd occ1990dd2_ occ1990dd3_ occ1990dd4_ occ1990dd5_ emp new_job ind ind6090 job_tenure1_ job_tenure2_ job_tenure3_ job_tenure4_ job_tenure5_ tenure tenure2_ tenure3_ tenure4_ tenure5_ jobsnum multemp numemp train train_type wkexp_yr wkexp_yr_total wkexp_sli wkexp_sli_total maxexp_yr maxexp_yr_total maxexp_sli maxexp_sli_total wkexp_yr_youth wkexp_sli_youth maxexp_yr_youth maxexp_sli_youth cur_work1_ cur_work2_ cur_work3_ cur_work4_ cur_work5_ layoff quit fire layoff_change layoff_2change layoff_3change layoff_4change quit_change quit_2change quit_3change quit_4change fire_change fire_2change fire_3change fire_4change hrs1_ hrs2_ hrs3_ hrs4_ hrs5_, i(id) j(year)
drop employer_startdate* employer_stopdate* occall*
rename job_tenure1_ job_tenure1
rename cur_work1_ cur_work1
rename hrs1_ hrs1
foreach x in wage occ occ1990dd tenure {
	gen `x'1=`x'
}
forvalues y=2(1)5 {
	foreach x in wage job_tenure tenure occ occ1990dd cur_work hrs {
		rename `x'`y'_ `x'`y'
	}
}

tsset id year
sort id year
gen emp_same=1 if jobsnum==jobsnum[_n-1] & emp==1
replace emp_same=0 if jobsnum!=jobsnum[_n-1] & emp==1
replace emp_same=0 if year==1979
sort id year
gen emp_tenure=.
forvalues x=1(1)27 {
	replace emp_tenure=year-year[_n-`x'] if jobsnum==jobsnum[_n-`x'] & emp==1 & id==id[_n-`x']
}
replace emp_tenure=0 if emp_tenure==. & emp==1
sort id year
gen occ_tenure=.
forvalues x=1(1)27 {
	replace occ_tenure=year-year[_n-`x'] if occ1990dd==occ1990dd[_n-`x'] & emp==1 & id==id[_n-`x']
}
replace occ_tenure=0 if occ_tenure==. & emp==1
gen emp_occ_tenure=.
forvalues x=1(1)27 {
	replace emp_occ_tenure=year-year[_n-`x'] if jobsnum==jobsnum[_n-`x'] & occ1990dd==occ1990dd[_n-`x'] & emp==1 & id==id[_n-`x']
}
replace emp_occ_tenure=0 if emp_occ_tenure==. & emp==1

*now track continuous work experience
gen exp_tenure=1 if emp==1 & emp[_n-1]==1 & id==id[_n-1]
forvalues y=2(1)37 {
	local z=`y'-1
	replace exp_tenure=`y' if exp_tenure[_n-1]==`z' & emp==1 & id==id[_n-1]
}
replace exp_tenure=0 if exp_tenure==. & emp==1

** ASVAB - Use Altonji, Bharadwaj and Lange (2009) file that gives age-adjusted 
*	comparability across NLSY surveys*
gen pid=id
gen sample=0
rename age age_temp
merge m:1 pid sample using "`dir'/afqt_adjusted_final.dta", keep(master match) ///
	keepusing(age weight pafqt afqt_std) nogen
rename age age_test
rename age_temp age
rename afqt_std afqt
egen afqt_std=std(afqt), mean(0) std(1)
merge m:1 pid sample using "`dir'/nlsy_majorselection_kahn.dta", keep(master match) ///
	keepusing(major stem) nogen
drop pid

*adjust wages for inflation
foreach x in wage wage1 wage2 wage3 wage4 wage5 incwage {
	replace `x'=`x'*3.38 if year==1979
	replace `x'=`x'*3.01 if year==1980
	replace `x'=`x'*2.71 if year==1981
	replace `x'=`x'*2.58 if year==1982
	replace `x'=`x'*2.51 if year==1983
	replace `x'=`x'*2.40 if year==1984
	replace `x'=`x'*2.33 if year==1985
	replace `x'=`x'*2.29 if year==1986
	replace `x'=`x'*2.20 if year==1987
	replace `x'=`x'*2.11 if year==1988
	replace `x'=`x'*2.02 if year==1989
	replace `x'=`x'*1.90 if year==1990
	replace `x'=`x'*1.84 if year==1991
	replace `x'=`x'*1.79 if year==1992
	replace `x'=`x'*1.74 if year==1993
	replace `x'=`x'*1.69 if year==1994
	replace `x'=`x'*1.60 if year==1996
	replace `x'=`x'*1.54 if year==1998
	replace `x'=`x'*1.45 if year==2000
	replace `x'=`x'*1.39 if year==2002
	replace `x'=`x'*1.33 if year==2004
	replace `x'=`x'*1.24 if year==2006
	replace `x'=`x'*1.15 if year==2008
	replace `x'=`x'*1.16 if year==2010
	replace `x'=`x'*1.09 if year==2012
	replace `x'=`x'*1.06 if year==2014
	replace `x'=`x'*1.05 if year==2016
}
*trim wages 
foreach x in wage wage1 wage2 wage3 wage4 wage5 {
	replace `x'=3 if `x'<3 & `x'!=.
	replace `x'=200 if `x'>200 & `x'!=.
}
*create panel for combining
gen male=sex==1
rename afqt_std iq_std
gen nlsy79=1
drop ageatint_1979 sample weight age_test pafqt afqt
compress
save "`dir'\nlsy79_learning_panel.dta", replace

